In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime as dt
import plotly.express as px
In [2]:
df = pd.read_csv('covid_19_india.csv')
In [3]:
df.head()
Out[3]:
Sno Date Time State/UnionTerritory ConfirmedIndianNational ConfirmedForeignNational Cured Deaths Confirmed
0 1 2020-01-30 6:00 PM Kerala 1 0 0 0 1
1 2 2020-01-31 6:00 PM Kerala 1 0 0 0 1
2 3 2020-02-01 6:00 PM Kerala 2 0 0 0 2
3 4 2020-02-02 6:00 PM Kerala 3 0 0 0 3
4 5 2020-02-03 6:00 PM Kerala 3 0 0 0 3
In [4]:
#keeping only required columns
df = df[['Date', 'State/UnionTerritory','Cured','Deaths','Confirmed']]
#renaming column names
df.columns = ['date', 'state','cured','deaths','confirmed']
In [5]:
df.shape
Out[5]:
(15554, 5)
In [6]:
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15554 entries, 0 to 15553
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   date       15554 non-null  object
 1   state      15554 non-null  object
 2   cured      15554 non-null  int64 
 3   deaths     15554 non-null  int64 
 4   confirmed  15554 non-null  int64 
dtypes: int64(3), object(2)
memory usage: 607.7+ KB
In [7]:
df.describe()
Out[7]:
cured deaths confirmed
count 1.555400e+04 15554.000000 1.555400e+04
mean 1.898184e+05 2898.898804 2.110835e+05
std 4.061648e+05 7637.201754 4.542665e+05
min 0.000000e+00 0.000000 0.000000e+00
25% 1.950250e+03 14.000000 3.177000e+03
50% 2.141550e+04 374.000000 2.874900e+04
75% 2.151482e+05 2527.000000 2.387830e+05
max 5.395370e+06 95344.000000 5.746892e+06
In [8]:
print(df.columns)
missing_values = pd.isnull(df.columns)
print(missing_values)
Index(['date', 'state', 'cured', 'deaths', 'confirmed'], dtype='object')
[False False False False False]
In [9]:
df.deaths.value_counts()
Out[9]:
0        1718
1         527
2         417
3         195
4         174
         ... 
3685        1
23695       1
3677        1
24151       1
10297       1
Name: deaths, Length: 5287, dtype: int64
In [10]:
df.confirmed.value_counts()
Out[10]:
1         263
2         117
7          92
3          88
33         63
         ... 
92536       1
905591      1
33141       1
750517      1
237552      1
Name: confirmed, Length: 12522, dtype: int64
In [11]:
today = df[df.date == '2021-05-19']
today
Out[11]:
date state cured deaths confirmed
15050 2021-05-19 Andaman and Nicobar Islands 6359 92 6674
15051 2021-05-19 Andhra Pradesh 1254291 9580 1475372
15052 2021-05-19 Arunachal Pradesh 19977 88 22462
15053 2021-05-19 Assam 290774 2344 340858
15054 2021-05-19 Bihar 595377 4039 664115
15055 2021-05-19 Chandigarh 48831 647 56513
15056 2021-05-19 Chhattisgarh 823113 12036 925531
15057 2021-05-19 Dadra and Nagar Haveli and Daman and Diu 8944 4 9652
15058 2021-05-19 Delhi 1329899 22111 1402873
15059 2021-05-19 Goa 112633 2197 138776
15060 2021-05-19 Gujarat 660489 9269 766201
15061 2021-05-19 Haryana 626852 6923 709689
15062 2021-05-19 Himachal Pradesh 129330 2460 166678
15063 2021-05-19 Jammu and Kashmir 197701 3293 251919
15064 2021-05-19 Jharkhand 284805 4601 320934
15065 2021-05-19 Karnataka 1674487 22838 2272374
15066 2021-05-19 Kerala 1846105 6612 2200706
15067 2021-05-19 Ladakh 15031 170 16784
15068 2021-05-19 Lakshadweep 3915 15 5212
15069 2021-05-19 Madhya Pradesh 652612 7139 742718
15070 2021-05-19 Maharashtra 4927480 83777 5433506
15071 2021-05-19 Manipur 33466 612 40683
15072 2021-05-19 Meghalaya 19185 355 24872
15073 2021-05-19 Mizoram 7094 29 9252
15074 2021-05-19 Nagaland 14079 228 18714
15075 2021-05-19 Odisha 536595 2357 633302
15076 2021-05-19 Puducherry 69060 1212 87749
15077 2021-05-19 Punjab 427058 12317 511652
15078 2021-05-19 Rajasthan 713129 7080 879664
15079 2021-05-19 Sikkim 8427 212 11689
15080 2021-05-19 Tamil Nadu 1403052 18369 1664350
15081 2021-05-19 Telangana 485644 3012 536766
15082 2021-05-19 Tripura 36402 450 42776
15083 2021-05-19 Uttarakhand 214426 5132 295790
15084 2021-05-19 Uttar Pradesh 1483249 18072 1637663
15085 2021-05-19 West Bengal 1026492 13576 1171861
In [12]:
#Sorting data with number of confirmed cases
max_confirmed_cases=today.sort_values(by="confirmed",ascending=False)
max_confirmed_cases
Out[12]:
date state cured deaths confirmed
15070 2021-05-19 Maharashtra 4927480 83777 5433506
15065 2021-05-19 Karnataka 1674487 22838 2272374
15066 2021-05-19 Kerala 1846105 6612 2200706
15080 2021-05-19 Tamil Nadu 1403052 18369 1664350
15084 2021-05-19 Uttar Pradesh 1483249 18072 1637663
15051 2021-05-19 Andhra Pradesh 1254291 9580 1475372
15058 2021-05-19 Delhi 1329899 22111 1402873
15085 2021-05-19 West Bengal 1026492 13576 1171861
15056 2021-05-19 Chhattisgarh 823113 12036 925531
15078 2021-05-19 Rajasthan 713129 7080 879664
15060 2021-05-19 Gujarat 660489 9269 766201
15069 2021-05-19 Madhya Pradesh 652612 7139 742718
15061 2021-05-19 Haryana 626852 6923 709689
15054 2021-05-19 Bihar 595377 4039 664115
15075 2021-05-19 Odisha 536595 2357 633302
15081 2021-05-19 Telangana 485644 3012 536766
15077 2021-05-19 Punjab 427058 12317 511652
15053 2021-05-19 Assam 290774 2344 340858
15064 2021-05-19 Jharkhand 284805 4601 320934
15083 2021-05-19 Uttarakhand 214426 5132 295790
15063 2021-05-19 Jammu and Kashmir 197701 3293 251919
15062 2021-05-19 Himachal Pradesh 129330 2460 166678
15059 2021-05-19 Goa 112633 2197 138776
15076 2021-05-19 Puducherry 69060 1212 87749
15055 2021-05-19 Chandigarh 48831 647 56513
15082 2021-05-19 Tripura 36402 450 42776
15071 2021-05-19 Manipur 33466 612 40683
15072 2021-05-19 Meghalaya 19185 355 24872
15052 2021-05-19 Arunachal Pradesh 19977 88 22462
15074 2021-05-19 Nagaland 14079 228 18714
15067 2021-05-19 Ladakh 15031 170 16784
15079 2021-05-19 Sikkim 8427 212 11689
15057 2021-05-19 Dadra and Nagar Haveli and Daman and Diu 8944 4 9652
15073 2021-05-19 Mizoram 7094 29 9252
15050 2021-05-19 Andaman and Nicobar Islands 6359 92 6674
15068 2021-05-19 Lakshadweep 3915 15 5212
In [13]:
#Getting states with maximum number of confirmed cases
top_states_confirmed=max_confirmed_cases[0:5]
In [14]:
#Making bar-plot for states with top confirmed cases
sns.set(rc={'figure.figsize':(20,10)})
sns.barplot(x="state",y="confirmed",data=top_states_confirmed,hue="state")
plt.show()
In [15]:
#Sorting data with number of death cases
max_death_cases=today.sort_values(by="deaths",ascending=False)
max_death_cases
Out[15]:
date state cured deaths confirmed
15070 2021-05-19 Maharashtra 4927480 83777 5433506
15065 2021-05-19 Karnataka 1674487 22838 2272374
15058 2021-05-19 Delhi 1329899 22111 1402873
15080 2021-05-19 Tamil Nadu 1403052 18369 1664350
15084 2021-05-19 Uttar Pradesh 1483249 18072 1637663
15085 2021-05-19 West Bengal 1026492 13576 1171861
15077 2021-05-19 Punjab 427058 12317 511652
15056 2021-05-19 Chhattisgarh 823113 12036 925531
15051 2021-05-19 Andhra Pradesh 1254291 9580 1475372
15060 2021-05-19 Gujarat 660489 9269 766201
15069 2021-05-19 Madhya Pradesh 652612 7139 742718
15078 2021-05-19 Rajasthan 713129 7080 879664
15061 2021-05-19 Haryana 626852 6923 709689
15066 2021-05-19 Kerala 1846105 6612 2200706
15083 2021-05-19 Uttarakhand 214426 5132 295790
15064 2021-05-19 Jharkhand 284805 4601 320934
15054 2021-05-19 Bihar 595377 4039 664115
15063 2021-05-19 Jammu and Kashmir 197701 3293 251919
15081 2021-05-19 Telangana 485644 3012 536766
15062 2021-05-19 Himachal Pradesh 129330 2460 166678
15075 2021-05-19 Odisha 536595 2357 633302
15053 2021-05-19 Assam 290774 2344 340858
15059 2021-05-19 Goa 112633 2197 138776
15076 2021-05-19 Puducherry 69060 1212 87749
15055 2021-05-19 Chandigarh 48831 647 56513
15071 2021-05-19 Manipur 33466 612 40683
15082 2021-05-19 Tripura 36402 450 42776
15072 2021-05-19 Meghalaya 19185 355 24872
15074 2021-05-19 Nagaland 14079 228 18714
15079 2021-05-19 Sikkim 8427 212 11689
15067 2021-05-19 Ladakh 15031 170 16784
15050 2021-05-19 Andaman and Nicobar Islands 6359 92 6674
15052 2021-05-19 Arunachal Pradesh 19977 88 22462
15073 2021-05-19 Mizoram 7094 29 9252
15068 2021-05-19 Lakshadweep 3915 15 5212
15057 2021-05-19 Dadra and Nagar Haveli and Daman and Diu 8944 4 9652
In [16]:
#Getting states with maximum number of death cases
top_states_death=max_death_cases[0:5]
In [17]:
fig = px.bar(top_states_death,x="state",y="deaths")
fig.show()
In [18]:
#Making bar-plot for states with top death cases
sns.set(rc={'figure.figsize':(15,10)})
sns.barplot(x="state",y="deaths",data=top_states_death,hue="state")
plt.show()
In [19]:
#Rajasthan
RJ = df[df.state == 'Rajasthan']
RJ
Out[19]:
date state cured deaths confirmed
36 2020-03-03 Rajasthan 0 0 1
44 2020-03-04 Rajasthan 0 0 15
48 2020-03-05 Rajasthan 0 0 15
54 2020-03-06 Rajasthan 0 0 15
64 2020-03-07 Rajasthan 0 0 15
... ... ... ... ... ...
15402 2021-05-28 Rajasthan 851998 8103 931200
15438 2021-05-29 Rajasthan 863175 8181 933848
15474 2021-05-30 Rajasthan 871283 8251 936162
15510 2021-05-31 Rajasthan 880919 8317 938460
15546 2021-06-01 Rajasthan 888919 8385 939958

456 rows × 5 columns

In [20]:
#Visualizing confirmed cases in rajasthan
#sns.set(rc={'figure.figsize':(15,10)})
sns.lineplot(x="date",y="confirmed",data=RJ,color="r")
plt.show()
In [21]:
#Visualizing death cases in rajasthan
sns.set(rc={'figure.figsize':(15,10)})
sns.lineplot(x="date",y="deaths",data=RJ,color="r")
plt.show()
In [22]:
pip install plotly
Requirement already satisfied: plotly in c:\users\admin\anaconda3\lib\site-packages (4.14.3)
Requirement already satisfied: six in c:\users\admin\anaconda3\lib\site-packages (from plotly) (1.15.0)
Requirement already satisfied: retrying>=1.3.3 in c:\users\admin\anaconda3\lib\site-packages (from plotly) (1.3.3)
Note: you may need to restart the kernel to use updated packages.
In [37]:
fig = px.scatter(RJ,x="date",y="confirmed")
fig.show()
In [23]:
fig = px.scatter(RJ,x="date",y="deaths")
fig.show()
In [24]:
fig = px.bar(RJ, x='date', y='deaths')
fig.show()
In [25]:
#Maharashtra
MH = df[df.state == 'Maharashtra']
MH
Out[25]:
date state cured deaths confirmed
76 2020-03-09 Maharashtra 0 0 2
91 2020-03-10 Maharashtra 0 0 5
97 2020-03-11 Maharashtra 0 0 2
120 2020-03-12 Maharashtra 0 0 11
133 2020-03-13 Maharashtra 0 0 14
... ... ... ... ... ...
15394 2021-05-28 Maharashtra 5276203 92225 5672180
15430 2021-05-29 Maharashtra 5307874 93198 5692920
15466 2021-05-30 Maharashtra 5339838 94030 5713215
15502 2021-05-31 Maharashtra 5362370 94844 5731815
15538 2021-06-01 Maharashtra 5395370 95344 5746892

450 rows × 5 columns

In [26]:
#Visualizing confirmed cases in maharashtra

sns.lineplot(x="date",y="confirmed",data=MH,color="r")
plt.show()
In [27]:
#Visualizing death cases in maharashtra
sns.set(rc={'figure.figsize':(15,10)})
sns.lineplot(x="date",y="deaths",data=MH,color="r")
plt.show()
In [28]:
fig = px.scatter(MH,x="date",y="confirmed")
fig.show()
In [29]:
fig = px.scatter(MH,x="date",y="deaths")
fig.show()
In [28]:
fig = px.bar(MH, x='date', y='deaths')
fig.show()
In [29]:
#Kerala
KL = df[df.state == 'Kerala']
KL
Out[29]:
date state cured deaths confirmed
0 2020-01-30 Kerala 0 0 1
1 2020-01-31 Kerala 0 0 1
2 2020-02-01 Kerala 0 0 2
3 2020-02-02 Kerala 0 0 3
4 2020-02-03 Kerala 0 0 3
... ... ... ... ... ...
15390 2021-05-28 Kerala 2198135 8063 2448554
15426 2021-05-29 Kerala 2224405 8257 2470872
15462 2021-05-30 Kerala 2252505 8455 2494385
15498 2021-05-31 Kerala 2281518 8641 2514279
15534 2021-06-01 Kerala 2310385 8815 2526579

489 rows × 5 columns

In [30]:
#Visualizing confirmed cases in kerala

sns.lineplot(x="date",y="confirmed",data=KL,color="r")
plt.show()
In [31]:
fig = px.scatter(KL,x="date",y="confirmed")
fig.show()
In [32]:
#Visualizing death cases in kerala
sns.set(rc={'figure.figsize':(15,10)})
sns.lineplot(x="date",y="deaths",data=KL,color="r")
plt.show()
In [33]:
fig = px.scatter(KL,x="date",y="deaths")
fig.show()
In [34]:
fig = px.bar(KL, x='date', y='deaths')
fig.show()
In [ ]: